The link to my github repository: homework 5
Problem 1
a.
Show the code
setClass (
"waldCI" ,
slots = list (
lb = "numeric" ,
ub = "numeric" ,
mean = "numeric" ,
sterr = "numeric" ,
level = "numeric"
),
validity = function (object) {
if (! is.finite (object@ level) || object@ level <= 0 || object@ level >= 1 )
return ("level must be between 0 and 1." )
if (! is.finite (object@ lb) || ! is.finite (object@ ub))
return ("the bound is infinite." )
if (! is.na (object@ lb) && ! is.na (object@ ub)) {
if (object@ lb >= object@ ub)
return ("lb must be < ub." )
}
TRUE
}
)
############## CONSTRUCTOR ##############
waldCI <- function (level, mean = NULL , sterr = NULL ,
lb = NULL , ub = NULL ) {
z <- qnorm (1 - (1 - level)/ 2 )
if (! is.null (mean) && ! is.null (sterr)){
lb <- mean - z * sterr
ub <- mean + z * sterr
} else if (! is.null (lb) && ! is.null (ub)) {
mean <- (lb + ub) / 2
sterr <- (ub - lb) / (2 * z)
} else {
stop ("You must provide either (mean, sterr) or (lb, ub)." )
}
obj <- new ("waldCI" ,
lb = lb, ub = ub,
mean = mean, sterr = sterr,
level = level)
validObject (obj)
obj
}
############## SHOW METHOD ##############
setMethod ("show" , "waldCI" ,
function (object) {
cat ("Wald Confidence Interval \n " )
cat (sprintf ("Level: %.3f \n " , object@ level))
cat (sprintf ("Mean: %.4f \n " , object@ mean))
cat (sprintf ("SE: %.4f \n " , object@ sterr))
cat (sprintf ("CI: [%.4f, %.4f] \n " , object@ lb, object@ ub))
})
############## ACCESSORS ##############
setGeneric ("lb" , function (x) standardGeneric ("lb" ))
Show the code
setGeneric ("ub" , function (x) standardGeneric ("ub" ))
Show the code
setGeneric ("mean" , function (x) standardGeneric ("mean" ))
Show the code
setGeneric ("sterr" , function (x) standardGeneric ("sterr" ))
Show the code
setGeneric ("level" , function (x) standardGeneric ("level" ))
Show the code
setMethod ("lb" , "waldCI" , function (x) x@ lb)
setMethod ("ub" , "waldCI" , function (x) x@ ub)
setMethod ("mean" , "waldCI" , function (x) x@ mean)
setMethod ("sterr" , "waldCI" , function (x) x@ sterr)
setMethod ("level" , "waldCI" , function (x) x@ level)
############## SETTERS (WITH VALIDATION) ##############
setGeneric ("lb<-" , function (x, value) standardGeneric ("lb<-" ))
Show the code
setGeneric ("ub<-" , function (x, value) standardGeneric ("ub<-" ))
Show the code
setGeneric ("mean<-" , function (x, value) standardGeneric ("mean<-" ))
Show the code
setGeneric ("sterr<-" , function (x, value) standardGeneric ("sterr<-" ))
Show the code
setGeneric ("level<-" , function (x, value) standardGeneric ("level<-" ))
Show the code
setMethod ("lb<-" , "waldCI" ,
function (x, value) {
x@ lb <- value; validObject (x); x
})
setMethod ("ub<-" , "waldCI" ,
function (x, value) {
x@ ub <- value; validObject (x); x
})
setMethod ("mean<-" , "waldCI" ,
function (x, value) {
x@ mean <- value; validObject (x); x
})
setMethod ("sterr<-" , "waldCI" ,
function (x, value) {
x@ sterr <- value; validObject (x); x
})
setMethod ("level<-" , "waldCI" ,
function (x, value) {
x@ level <- value; validObject (x); x
})
############## CONTAINS FUNCTION ##############
setGeneric ("contains" , function (object, value) standardGeneric ("contains" ))
Show the code
setMethod ("contains" , c ("waldCI" ,"numeric" ),
function (object, value) {
value >= object@ lb & value <= object@ ub
})
############## OVERLAP FUNCTION ##############
setGeneric ("overlap" , function (object1, object2) standardGeneric ("overlap" ))
Show the code
setMethod ("overlap" , c ("waldCI" ,"waldCI" ),
function (object1, object2) {
! (object1@ ub < object2@ lb || object1@ lb > object2@ ub)
})
############## AS.NUMERIC ##############
setMethod ("as.numeric" , "waldCI" ,
function (x) c (x@ lb, x@ ub))
############## TRANSFORM CI ##############
setGeneric ("transformCI" , function (ci, f) standardGeneric ("transformCI" ))
Show the code
setMethod ("transformCI" , c ("waldCI" ,"function" ),
function (ci, f) {
warning ("Only monotonic transformations preserve CI meaning." )
lb2 <- f (ci@ lb)
ub2 <- f (ci@ ub)
waldCI (level = ci@ level,
lb = min (lb2, ub2),
ub = max (lb2, ub2))
})
b.
Show the code
ci1 <- waldCI (level = 0.95 , lb = 17.2 , ub = 24.7 )
ci2 <- waldCI (level = 0.99 , mean = 13 , sterr = 2.5 )
ci3 <- waldCI (level = 0.75 , lb = 27.43 , ub = 39.22 )
Show the code
Wald Confidence Interval
Level: 0.950
Mean: 20.9500
SE: 1.9133
CI: [17.2000, 24.7000]
Show the code
Wald Confidence Interval
Level: 0.990
Mean: 13.0000
SE: 2.5000
CI: [6.5604, 19.4396]
Show the code
Wald Confidence Interval
Level: 0.750
Mean: 33.3250
SE: 5.1245
CI: [27.4300, 39.2200]
Show the code
Show the code
Show the code
Show the code
Show the code
Show the code
Show the code
Show the code
Show the code
lb (ci2) <- 10.5
mean (ci3) <- 34
level (ci3) <- .8
contains (ci1, 17 )
Show the code
Show the code
Show the code
eci1 <- transformCI (ci1, sqrt)
Warning in transformCI(ci1, sqrt): Only monotonic transformations preserve CI
meaning.
Show the code
Wald Confidence Interval
Level: 0.950
Mean: 4.5586
SE: 0.2099
CI: [4.1473, 4.9699]
Show the code
mean (transformCI (ci2, log))
Warning in transformCI(ci2, log): Only monotonic transformations preserve CI
meaning.
c.
Show the code
#negative standard error
waldCI (level = 0.95 , mean = 1 , sterr = - 1 )
Error in validObject(.Object): invalid class "waldCI" object: lb must be < ub.
Show the code
#lb > ub
waldCI (level = 0.95 , lb = 2 , ub = 1 )
Error in validObject(.Object): invalid class "waldCI" object: lb must be < ub.
Show the code
#Infinite bounds
waldCI (level = 0.95 , lb = - Inf , ub = Inf )
Error in validObject(.Object): invalid class "waldCI" object: the bound is infinite.
Show the code
#invalid use of the setters
ci <- waldCI (level = 0.95 , lb = 1 , ub = 2 )
lb (ci) <- 3
Error in validObject(x): invalid class "waldCI" object: lb must be < ub.
Problem 3
Show the code
data <- read.csv ("https://raw.githubusercontent.com/nytimes/covid-19-data/refs/heads/master/rolling-averages/us-states.csv" )
head (data)
date geoid state cases cases_avg cases_avg_per_100k deaths
1 2020-01-21 USA-53 Washington 1 0.14 0 0
2 2020-01-22 USA-53 Washington 0 0.14 0 0
3 2020-01-23 USA-53 Washington 0 0.14 0 0
4 2020-01-24 USA-53 Washington 0 0.14 0 0
5 2020-01-24 USA-17 Illinois 1 0.14 0 0
6 2020-01-25 USA-53 Washington 0 0.14 0 0
deaths_avg deaths_avg_per_100k
1 0 0
2 0 0
3 0 0
4 0 0
5 0 0
6 0 0
Show the code
[1] "date" "geoid" "state"
[4] "cases" "cases_avg" "cases_avg_per_100k"
[7] "deaths" "deaths_avg" "deaths_avg_per_100k"
###a. How many major and minor spikes in cases were there?
Show the code
#chatgpt taught me how to use ymd() to change date into date type so that it can be further manipulated.
library (plotly)
Loading required package: ggplot2
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
Show the code
Attaching package: 'dplyr'
The following object is masked _by_ '.GlobalEnv':
contains
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
Show the code
Attaching package: 'lubridate'
The following objects are masked from 'package:base':
date, intersect, setdiff, union
Show the code
covid <- data %>%
group_by (date) %>%
summarise (usa_cases = mean (cases_avg_per_100k))
covid$ date <- ymd (covid$ date)
fig <- covid %>%
plot_ly (
x = ~ date,
y = ~ usa_cases,
type = 'scatter' ,
mode = 'lines+markers' ,
line = list (width = 2 )
) %>%
layout (
xaxis = list (
title = "Date" ,
tickformat = "%b %Y" ,
dtick = "M3" ,
tickangle = 45
),
yaxis = list (title = "Cases per 100k" ),
template = "plotly_white"
)
fig
b.
For the states with the highest and lowest overall rates per population, what differences do you see in their trajectories over time?
Show the code
states <- data %>%
group_by (state) %>%
summarise (overall_rate = mean (cases_avg,na.rm = TRUE ))
high_state <- states %>% slice_max (overall_rate, n = 1 ) %>% pull (state)
low_state <- states %>% slice_min (overall_rate, n = 1 ) %>% pull (state)
prob_b <- data %>%
filter (state %in% c (high_state,low_state))
prob_b$ date <- ymd (prob_b$ date)
fig <- prob_b %>%
plot_ly (
x = ~ date,
y = ~ cases_avg,
color = ~ state,
type = "scatter" ,
mode = "lines" ,
line = list (width = 2 )
) %>%
layout (
xaxis = list (
title = "Date" ,
tickformat = "%b %Y" ,
dtick = "M3" ,
tickangle = 45
),
yaxis = list (title = "Cases per day" ),
template = "plotly_white"
)
fig
Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
c.
Identify, to the best of your ability without a formal test, the first five states to experience Covid in a substantial way.
Show the code
first_wave <- data %>%
filter (cases_avg_per_100k > 1 ) %>%
group_by (state) %>%
summarize (first_date = min (date)) %>%
arrange (first_date) %>%
slice_head (n = 5 )
first_states <- first_wave$ state
cat ("The first five states to ecperience Covid in a substantial way are" , first_states, sep = ", " )
The first five states to ecperience Covid in a substantial way are, Washington, New York, Guam, Louisiana, New Jersey
Show the code
plot_data <- data %>%
filter (state %in% first_states) %>%
mutate (date = ymd (date))
fig <- plot_data %>%
plot_ly (
x = ~ date,
y = ~ cases_avg_per_100k,
color = ~ state,
type = "scatter" ,
mode = "lines" ,
line = list (width = 1 )
) %>%
layout (
xaxis = list (
title = "Date" ,
tickformat = "%b %Y" , # Month Year
dtick = "M3" , # every 3 months
tickangle = 45
),
yaxis = list (title = "Cases per 100k" ),
template = "plotly_white"
)
fig
The first five states to ecperience Covid in a substantial way are: Washington, New York, Guam, Louisiana, New Jersey.